Grabbing SPINS gradients
## -- Attaching packages --------------------------------------- tidyverse 1.3.2 --
## v ggplot2 3.4.0 v purrr 0.3.5
## v tibble 3.1.8 v dplyr 1.0.10
## v tidyr 1.2.1 v stringr 1.4.1
## v readr 2.1.3 v forcats 0.5.2
## Warning: package 'ggplot2' was built under R version 4.1.3
## Warning: package 'tidyr' was built under R version 4.1.3
## Warning: package 'readr' was built under R version 4.1.3
## Warning: package 'purrr' was built under R version 4.1.3
## Warning: package 'dplyr' was built under R version 4.1.3
## Warning: package 'stringr' was built under R version 4.1.3
## Warning: package 'forcats' was built under R version 4.1.3
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
## Warning: package 'ggseg' was built under R version 4.1.3
## Warning: package 'broom' was built under R version 4.1.3
## Loading required package: prettyGraphs
## Loading required package: ExPosition
## Warning: package 'plotly' was built under R version 4.1.3
##
## Attaching package: 'plotly'
##
## The following object is masked from 'package:ggplot2':
##
## last_plot
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following object is masked from 'package:graphics':
##
## layout
## Warning: package 'colorspace' was built under R version 4.1.3
##
## Attaching package: 'colorspace'
##
## The following object is masked from 'package:PTCA4CATA':
##
## lighten
## Warning: package 'tableone' was built under R version 4.1.3
## Warning: package 'RColorBrewer' was built under R version 4.1.3
## New names:
## Rows: 164640 Columns: 8
## -- Column specification
## -------------------------------------------------------- Delimiter: "," chr
## (4): ROI, Network, Subject, Site dbl (4): ...1, grad1, grad2, grad3
## i Use `spec()` to retrieve the full column specification for this data. i
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## * `` -> `...1`
## [1] "record_id" "scanner"
## [3] "diagnostic_group" "demo_sex"
## [5] "demo_age_study_entry" "scog_rmet_total"
## [7] "scog_er40_total" "scog_tasit1_total"
## [9] "scog_tasit2_sinc" "scog_tasit2_simpsar"
## [11] "scog_tasit2_parsar" "scog_tasit3_lie"
## [13] "scog_tasit3_sar" "np_domain_tscore_process_speed"
## [15] "np_domain_tscore_att_vigilance" "np_domain_tscore_work_mem"
## [17] "np_domain_tscore_verbal_learning" "np_domain_tscore_visual_learning"
## [19] "np_domain_tscore_reasoning_ps"
## New names:
## Rows: 467 Columns: 43
## -- Column specification
## -------------------------------------------------------- Delimiter: "," chr
## (4): record_id, scanner, diagnostic_group, demo_sex dbl (36): ...1,
## demo_age_study_entry, scog_rmet_total, scog_er40_total, scog... lgl (3):
## exclude_MRI, exclude_meanFD, exclude_earlyTerm
## i Use `spec()` to retrieve the full column specification for this data. i
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## * `` -> `...1`
grad.sub <- spins_grads_wide$Subject[order(spins_grads_wide$Subject)]
behav.sub <- lol_spins_behav$record_id[order(lol_spins_behav$record_id)]
# behav.sub[behav.sub %in% grad.sub == FALSE]
# grad.sub[grad.sub %in% behav.sub == FALSE]
# complete.cases(spins_grads_wide)
# complete.cases(lol_spins_behav)
kept.sub <- lol_spins_behav$record_id[complete.cases(lol_spins_behav)==TRUE] # 420
## grab the matching data
behav.dat <- lol_spins_behav[kept.sub,c(6:19)]
spins_grads_wide_org <- spins_grads_wide[,-1]
rownames(spins_grads_wide_org) <- spins_grads_wide$Subject
grad.dat <- spins_grads_wide_org[kept.sub,]
## variables to regress out
regout.dat <- var2regout_num[kept.sub,]
behav_all <- lol_spins_behav[kept.sub,]
table_one <- CreateTableOne(vars = colnames(behav_all)[4:19], strata="diagnostic_group",data=behav_all)
lol_demo <-
read_csv('../data/spins_lolivers_subject_info_for_grads_2022-04-21(withcomposite).csv') %>%
filter(exclude_MRI==FALSE,
exclude_meanFD==FALSE,
exclude_earlyTerm==FALSE) %>% as.data.frame
## New names:
## Rows: 467 Columns: 46
## -- Column specification
## -------------------------------------------------------- Delimiter: "," chr
## (4): record_id, scanner, diagnostic_group, demo_sex dbl (39): ...1,
## demo_age_study_entry, scog_rmet_total, scog_er40_total, scog... lgl (3):
## exclude_MRI, exclude_meanFD, exclude_earlyTerm
## i Use `spec()` to retrieve the full column specification for this data. i
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## * `` -> `...1`
lol_demo$subject <- sub("SPN01_", "sub-", lol_demo$record_id) %>% sub("_", "", .)
rownames(lol_demo) <- lol_demo$record_id
lol_demo_match <- lol_demo[kept.sub,]
spins_demo <- lol_demo_match %>%
select(demo_sex, demo_age_study_entry, diagnostic_group, scog_rmet_total, scog_er40_total, #scog_mean_ea,
scog_tasit1_total,
scog_tasit2_total, scog_tasit3_total,np_composite_tscore, np_domain_tscore_att_vigilance,
np_domain_tscore_process_speed, np_domain_tscore_work_mem,
np_domain_tscore_verbal_learning, np_domain_tscore_visual_learning,
np_domain_tscore_reasoning_ps,
#bsfs_sec2_total, bsfs_sec3_total, bsfs_sec3_total, bsfs_sec4_total, bsfs_sec5_total, bsfs_sec6_total,
#fd_mean_rest
) %>% data.frame
colnames(spins_demo)
## [1] "demo_sex" "demo_age_study_entry"
## [3] "diagnostic_group" "scog_rmet_total"
## [5] "scog_er40_total" "scog_tasit1_total"
## [7] "scog_tasit2_total" "scog_tasit3_total"
## [9] "np_composite_tscore" "np_domain_tscore_att_vigilance"
## [11] "np_domain_tscore_process_speed" "np_domain_tscore_work_mem"
## [13] "np_domain_tscore_verbal_learning" "np_domain_tscore_visual_learning"
## [15] "np_domain_tscore_reasoning_ps"
rownames(spins_demo) <- lol_demo_match$subject
spins_demo %>%
group_by(diagnostic_group) %>%
summarise_if(is.numeric, mean, na.rm = TRUE) %>% t
## [,1] [,2]
## diagnostic_group "case" "control"
## demo_age_study_entry "31.41532" "31.94767"
## scog_rmet_total "24.56855" "27.59649"
## scog_er40_total "31.83539" "33.54651"
## scog_tasit1_total "22.50000" "24.63953"
## scog_tasit2_total "47.52823" "54.47093"
## scog_tasit3_total "48.35102" "54.71512"
## np_composite_tscore "35.42387" "49.57059"
## np_domain_tscore_att_vigilance "39.49794" "47.64912"
## np_domain_tscore_process_speed "39.69355" "53.06395"
## np_domain_tscore_work_mem "41.27016" "49.15698"
## np_domain_tscore_verbal_learning "40.66532" "50.30233"
## np_domain_tscore_visual_learning "38.72984" "48.37791"
## np_domain_tscore_reasoning_ps "42.91129" "48.75581"
spins_demo %>%
group_by(diagnostic_group) %>%
summarize_if(is.numeric, sd, na.rm = TRUE) %>% t
## [,1] [,2]
## diagnostic_group "case" "control"
## demo_age_study_entry " 9.768209" "10.395267"
## scog_rmet_total "5.258051" "3.821886"
## scog_er40_total "4.549732" "3.319822"
## scog_tasit1_total "3.640750" "2.135267"
## scog_tasit2_total "8.526169" "4.228042"
## scog_tasit3_total "7.271608" "5.264379"
## np_composite_tscore "12.93041" "11.01147"
## np_domain_tscore_att_vigilance "11.65920" "12.71612"
## np_domain_tscore_process_speed "13.16429" "10.09612"
## np_domain_tscore_work_mem "11.19371" "11.36136"
## np_domain_tscore_verbal_learning "8.937756" "9.438716"
## np_domain_tscore_visual_learning "12.45736" "10.06134"
## np_domain_tscore_reasoning_ps "10.97108" " 9.54391"
cbind(table(spins_demo$diagnostic_group, spins_demo$demo_sex), table(spins_demo$diagnostic_group))
## female male
## case 79 169 248
## control 80 92 172
table(regout.dat$demo_sex_num)
##
## 0 1
## 159 261
behav.reg <- apply(behav.dat, 2, function(x) lm(x~regout.dat$demo_sex + regout.dat$demo_age_study_entry + regout.dat$fd_mean_rest)$residual)
grad.reg <- apply(grad.dat, 2, function(x) lm(x~regout.dat$demo_sex + regout.dat$demo_age_study_entry + regout.dat$fd_mean_rest)$residual)
grad.reg2plot <- apply(grad.dat, 2, function(x){
model <- lm(x~regout.dat$demo_sex + regout.dat$demo_age_study_entry + regout.dat$fd_mean_rest)
return(model$residual + model$coefficient[1])
} )
networks <- read_delim("../networks.txt",
"\t", escape_double = FALSE, trim_ws = TRUE) %>%
select(NETWORK, NETWORKKEY, RED, GREEN, BLUE, ALPHA) %>%
distinct() %>%
add_row(NETWORK = "Subcortical", NETWORKKEY = 13, RED = 0, GREEN=0, BLUE=0, ALPHA=255) %>%
mutate(hex = rgb(RED, GREEN, BLUE, maxColorValue = 255)) %>%
arrange(NETWORKKEY)
## Rows: 718 Columns: 12
## -- Column specification --------------------------------------------------------
## Delimiter: "\t"
## chr (4): LABEL, HEMISPHERE, NETWORK, GLASSERLABELNAME
## dbl (8): INDEX, KEYVALUE, RED, GREEN, BLUE, ALPHA, NETWORKKEY, NETWORKSORTED...
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
networks$hex <- darken(networks$hex, 0.2)
# oi <- networks$hex
# swatchplot(
# "-40%" = lighten(oi, 0.4),
# "-20%" = lighten(oi, 0.2),
# " 0%" = oi,
# " 20%" = darken(oi, 0.2),
# " 25%" = darken(oi, 0.25),
# " 30%" = darken(oi, 0.3),
# " 35%" = darken(oi, 0.35),
# off = c(0, 0)
# )
# networks
## match ROIs to networks
ROI.network.match <- cbind(spins_grads$ROI, spins_grads$Network) %>% unique
ROI.idx <- ROI.network.match[,2]
names(ROI.idx) <- ROI.network.match[,1]
### match networks with colors
net.col.idx <- networks$hex
names(net.col.idx) <- networks$NETWORK
## design matrix for subjects
sub.dx <- spins_dx_org[kept.sub,]
diagnostic.dx <- sub.dx$diagnostic_group %>% as.matrix
diagnostic.dx <- recode(diagnostic.dx, !!!c("case" = "SSD"))
diagnostic.col.idx <- c("SSD" = "darkorchid3",
"control" = "gray50")
diagnostic.col <- list()
diagnostic.col$oc <- recode(diagnostic.dx, !!!diagnostic.col.idx) %>% as.matrix()
diagnostic.col$gc <- diagnostic.col.idx %>% as.matrix
## design matrix for columns - behavioral
behav.dx <- matrix(nrow = ncol(behav.dat), ncol = 1, dimnames = list(colnames(behav.dat), "type")) %>% as.data.frame
behav.col <- c("scog" = "#D97614",#"#F28E2B",
"np" = "#3F7538",#"#59A14F",
"bsfs" = "#D37295")
behav.dx$type <- sub("(^[^_]+).*", "\\1", colnames(behav.dat))
behav.dx$type.col <- recode(behav.dx$type, !!!behav.col)
## design matrix for columns - gradient
grad.dx <- matrix(nrow = ncol(grad.dat), ncol = 4, dimnames = list(colnames(grad.dat), c("gradient", "ROI", "network", "network.col"))) %>% as.data.frame
grad.dx$gradient <- sub("(^[^.]+).*", "\\1", colnames(grad.dat))
grad.dx$ROI <- sub("^[^.]+.(*)", "\\1", colnames(grad.dat))
grad.dx$network <- recode(grad.dx$ROI, !!!ROI.idx)
grad.dx$network.col <- recode(grad.dx$network, !!!net.col.idx)
## get different alpha for gradients
grad.col.idx <- c("grad1" = "grey30",
"grad2" = "grey60",
"grad3" = "grey90")
grad.dx$gradient.col <- recode(grad.dx$gradient, !!!grad.col.idx)
## for heatmap
col.heat <- colorRampPalette(c("red", "white", "blue"))(256)
pls.res <- tepPLS(behav.reg, grad.reg, DESIGN = sub.dx$diagnostic_group, make_design_nominal = TRUE, graphs = FALSE)
pls.boot <- data4PCCAR::Boot4PLSC(behav.reg, grad.reg, scale1 = "SS1", scale2 = "SS1", nIter = 1000, nf2keep = 5, eig = TRUE)
## Registered S3 method overwritten by 'data4PCCAR':
## method from
## print.str_colorsOfMusic PTCA4CATA
pls.boot$bootRatiosSignificant.j[abs(pls.boot$bootRatios.j) < 2.88] <- FALSE
pls.boot$bootRatiosSignificant.i[abs(pls.boot$bootRatios.i) < 2.88] <- FALSE
pls.inf <- data4PCCAR::perm4PLSC(behav.reg, grad.reg, scale1 = "SS1", scale2 = "SS1", nIter = 1000)
# ## swith direction for dimension 3
pls.res$TExPosition.Data$fi[,1] <- pls.res$TExPosition.Data$fi[,1]*-1
pls.res$TExPosition.Data$fj[,1] <- pls.res$TExPosition.Data$fj[,1]*-1
pls.res$TExPosition.Data$pdq$p[,1] <- pls.res$TExPosition.Data$pdq$p[,1]*-1
pls.res$TExPosition.Data$pdq$q[,1] <- pls.res$TExPosition.Data$pdq$q[,1]*-1
pls.res$TExPosition.Data$lx[,1] <- pls.res$TExPosition.Data$lx[,1]*-1
pls.res$TExPosition.Data$ly[,1] <- pls.res$TExPosition.Data$ly[,1]*-1
## Scree plot
PlotScree(pls.res$TExPosition.Data$eigs,
p.ev = pls.inf$pEigenvalues)
## Print singular values
pls.res$TExPosition.Data$pdq$Dv
## [1] 7.1330565 2.1157103 1.9079739 1.6310932 1.5321817 1.4132437 1.2515730
## [8] 1.1846931 1.1177672 1.0190911 0.9319565 0.9106204 0.8248812 0.7818378
## Print eigenvalues
pls.res$TExPosition.Data$eigs
## [1] 50.8804950 4.4762303 3.6403644 2.6604652 2.3475808 1.9972578
## [7] 1.5664349 1.4034979 1.2494036 1.0385467 0.8685430 0.8292296
## [13] 0.6804289 0.6112703
pls.res$TExPosition.Data$t
## [1] 68.5261515 6.0286134 4.9028643 3.5831302 3.1617357 2.6899186
## [7] 2.1096838 1.8902392 1.6827041 1.3987209 1.1697588 1.1168113
## [13] 0.9164057 0.8232624
## Compare the inertia to the largest possible inertia
sum(cor(behav.dat, grad.dat)^2)
## [1] 81.59259
sum(cor(behav.dat, grad.dat)^2)/(ncol(behav.dat)*ncol(grad.dat))
## [1] 0.004955818
Here, we show that the effect that PLSC decomposes is pretty small to begin with. The effect size of the correlation between the two tables is 92.40 which accounts for 0.0065 of the largest possible effect.
lxly.out[[1]]
lx1.ssd <- pls.res$TExPosition.Data$lx[which(sub.dx$diagnostic_group == "case"), 1]
lx1.hc <- pls.res$TExPosition.Data$lx[which(sub.dx$diagnostic_group == "control"), 1]
gridExtra::grid.arrange(bar.grad1, bar.grad2, bar.grad3, ncol = 1)
PrettyBarPlot2(pls.res$TExPosition.Data$fi[,1],
threshold = 0,
color4bar = ifelse(pls.boot$bootRatiosSignificant.i[,1] == TRUE, behav.dx$type.col, "grey90"),
horizontal = FALSE, main = "Scores - behavioural")
## Warning: Vectorized input to `element_text()` is not officially supported.
## i Results may be unexpected or may change in future versions of ggplot2.
cor.heat <- pls.res$TExPosition.Data$X %>% heatmap(col = col.heat)
## control
grad.dat.ctrl <- grad.dat[sub.dx$diagnostic_group == "control",]
behav.dat.ctrl <- behav.dat[sub.dx$diagnostic_group == "control",]
corX.ctrl <- cor(as.matrix(behav.dat.ctrl),as.matrix(grad.dat.ctrl))
heatmap(corX.ctrl[cor.heat$rowInd, cor.heat$colInd], col = col.heat, Rowv = NA, Colv = NA)
## case
grad.dat.case <- grad.dat[sub.dx$diagnostic_group == "case",]
behav.dat.case <- behav.dat[sub.dx$diagnostic_group == "case",]
corX.case <- cor(as.matrix(behav.dat.case),as.matrix(grad.dat.case))
heatmap(corX.case[cor.heat$rowInd, cor.heat$colInd], col = col.heat, Rowv = NA, Colv = NA)
\[CV_{control} = \] 1.06 % bootstrap CI: [0.82, 1.37]
\[CV_{SSD} = \] 2.33 % bootstrap CI: [1.81, 3.06]
lxly.out[[2]]
gridExtra::grid.arrange(bar.grad1, bar.grad2, bar.grad3, ncol = 1)
PrettyBarPlot2(pls.res$TExPosition.Data$fi[,2],
threshold = 0, color4bar = ifelse(pls.boot$bootRatiosSignificant.i[,2] == TRUE, behav.dx$type.col, "grey90"),
horizontal = FALSE, main = "Scores - behavioural")
dim1.est <- pls.res$TExPosition.Data$pdq$Dv[1]*as.matrix(pls.res$TExPosition.Data$pdq$p[,1], ncol = 1) %*% t(as.matrix(pls.res$TExPosition.Data$pdq$q[,1], ncol = 1))
cor.heat.res1 <- (pls.res$TExPosition.Data$X - dim1.est) %>% heatmap(col = col.heat)
lxly.out[[3]]
gridExtra::grid.arrange(bar.grad1, bar.grad2, bar.grad3, ncol = 1)
PrettyBarPlot2(pls.res$TExPosition.Data$fi[,3],
threshold = 0, color4bar = ifelse(pls.boot$bootRatiosSignificant.i[,3] == TRUE, behav.dx$type.col, "grey90"),
horizontal = FALSE, main = "Scores - behavioural")
dim2.est <- (as.matrix(pls.res$TExPosition.Data$pdq$p[,1:2]) %*% pls.res$TExPosition.Data$pdq$Dd[1:2,1:2] %*% t(as.matrix(pls.res$TExPosition.Data$pdq$q[,1:2])))
cor.heat.res2 <- heatmap(pls.res$TExPosition.Data$X - dim2.est, col = col.heat)
lxly.out[[4]]
gridExtra::grid.arrange(bar.grad1, bar.grad2, bar.grad3, ncol = 1)
PrettyBarPlot2(pls.res$TExPosition.Data$fi[,4],
threshold = 0,
color4bar = ifelse(pls.boot$bootRatiosSignificant.i[,4] == TRUE, behav.dx$type.col, "grey90"),
horizontal = FALSE, main = "Scores - behavioural")
dim3.est <- (as.matrix(pls.res$TExPosition.Data$pdq$p[,1:3]) %*% pls.res$TExPosition.Data$pdq$Dd[1:3,1:3] %*% t(as.matrix(pls.res$TExPosition.Data$pdq$q[,1:3])))
cor.heat.res3 <- heatmap(pls.res$TExPosition.Data$X - dim3.est, col = col.heat)
## merging atlas and data by 'label'
## merging atlas and data by 'label'
## merging atlas and data by 'label'
## merging atlas and data by 'label'
## merging atlas and data by 'label'
## merging atlas and data by 'label'
## merging atlas and data by 'label'
## merging atlas and data by 'label'
## merging atlas and data by 'label'
## merging atlas and data by 'label'
## merging atlas and data by 'label'
## merging atlas and data by 'label'
## merging atlas and data by 'label'
## merging atlas and data by 'label'
## merging atlas and data by 'label'
## merging atlas and data by 'label'
## merging atlas and data by 'label'
## merging atlas and data by 'label'
## merging atlas and data by 'label'
## merging atlas and data by 'label'
## merging atlas and data by 'label'
## merging atlas and data by 'label'
## merging atlas and data by 'label'
## merging atlas and data by 'label'
## merging atlas and data by 'label'
## merging atlas and data by 'label'
3D plot of the gradients
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## i Please use `linewidth` instead.
We need to interpret the arrows with cautious, because only the direction and the magnitude are meaningful but not the end point.
We need to interpret the arrows with cautious, because only the direction and the magnitude are meaningful but not the end point.
We need to interpret the arrows with cautious, because only the direction and the magnitude are meaningful but not the end point.
behav_sympt <- read.csv("../data/spins_behav_data_full_03-03-2022.csv") %>%
select(record_id, bsfs_total, qls_total, bprs_factor_total, sans_total_sc)
## Warning in scan(file = file, what = what, sep = sep, quote = quote, dec = dec, :
## EOF within quoted string
rownames(behav_sympt) <- behav_sympt$record_id
lol_spins_behav_smp <- cbind(lol_spins_behav, behav_sympt[lol_spins_behav$record_id, ])
lol_spins_behav_ssd <- lol_spins_behav_smp[lol_spins_behav_smp$diagnostic_group == "case",]
lol_spins_behav_ssd <- lol_spins_behav_ssd[complete.cases(lol_spins_behav_ssd),] # removing two people with missing qls
## SSD subjects
ssd.sub <- lol_spins_behav_ssd$record_id
## design matrix for subjects
spins_dx_ssd <- lol_spins_behav_ssd %>%
select(subject,scanner,diagnostic_group,demo_sex,demo_age_study_entry)
## numeric data
spins_symp_ssd <- lol_spins_behav_ssd %>%
select(
bsfs_total, qls_total, bprs_factor_total, sans_total_sc
) %>% data.frame
rownames(spins_symp_ssd) <- lol_spins_behav_ssd$record_id
## select lx/ly
ssd.lx <- pls.res$TExPosition.Data$lx[ssd.sub,]
ssd.ly <- pls.res$TExPosition.Data$ly[ssd.sub,]
## color for symptoms measures
## design matrix for columns - behavioral
sympt.dx <- matrix(nrow = ncol(spins_symp_ssd), ncol = 1, dimnames = list(colnames(spins_symp_ssd), "type")) %>% as.data.frame
sympt.col <- c("bsfs" = "#D37295",
"bprs" = "#E15759",
"qls" = "#B07AA1",
"qls20" = "#B07AA1",
"sans" = "#FF9888")
sympt.dx$type <- sub("(^[^_]+).*", "\\1", colnames(spins_symp_ssd))
sympt.dx$type.col <- recode(sympt.dx$type, !!!sympt.col)
## Warning in scan(file = file, what = what, sep = sep, quote = quote, dec = dec, :
## EOF within quoted string